#encoding=utf8
import re

def textParse(str_txt):
	#正则过滤特殊符号，标点，英文
	r1 = '[a-zA-Z0-9’!"#$%&\'()*+,-./:：;；|<=>?@，—。?★、…【】《》？“”‘’！[\\]^_`{|}~]+'
	# 去除空格
	r2 = '\s+'

	str_txt = re.sub(r1,' ',str_txt)
	str_txt = re.sub(r2,' ',str_txt)

	#去掉换行符
	str_txt = str_txt.replace('\n','')

	return str_txt


def readFile(path):
	str_txt = ""
	with open(path,'r',encoding='utf-8') as file:
		str_txt = file.read()
	return str_txt


if __name__ == '__main__':
	path = r'../CSCMNews/体育/0.txt'
	str_txt = readFile(path)
	print(str_txt)
	str_txt = textParse(str_txt)
	print(str_txt)